Libraries & functions

First, I’m loading the required libraries & functions

suppressPackageStartupMessages(library(Seurat, lib.loc = "/software/Seuratv4/lib/")) # Seurat v4.4.0, library for single-cell analysis
suppressPackageStartupMessages(library(SeuratObject, lib.loc = "/software/Seuratv4/lib/")) # For compatibility purpose
suppressPackageStartupMessages(library(data.table)) # For writing DE gene file
suppressPackageStartupMessages(library(ggplot2)) # For plotting
suppressPackageStartupMessages(library(ggpubr)) # For grid plotting
suppressPackageStartupMessages(library(plotly)) # For interactive plots
suppressPackageStartupMessages(library(harmony)) # For integration
suppressPackageStartupMessages(library(bio3d)) # For plotting hierarchical clustering
suppressPackageStartupMessages(library(crayon)) # Just for bolding the console output :D

cat(bold("Seurat"), "version", as.character(packageVersion("Seurat")), "\n")
## Seurat version 4.4.0
cat(bold("data.table"), "version", as.character(packageVersion("data.table")), "\n")
## data.table version 1.16.4
cat(bold("ggplot2"), "version", as.character(packageVersion("ggplot2")), "\n")
## ggplot2 version 3.5.1
cat(bold("ggpubr"), "version", as.character(packageVersion("ggpubr")), "\n")
## ggpubr version 0.6.0
cat(bold("plotly"), "version", as.character(packageVersion("plotly")), "\n")
## plotly version 4.10.4
cat(bold("harmony"), "version", as.character(packageVersion("harmony")), "\n")
## harmony version 1.2.3
cat(bold("bio3d"), "version", as.character(packageVersion("bio3d")), "\n")
## bio3d version 2.4.5
# Color-blind friendly palette 3-colors
cbPalette <- c("#E69F00", "#000000", "#56B4E9")
steinPalette <- c("#80C980", "#BDADD4", "#376CB0", "#FBBF85", "#F0027E")
tissuePalette <- list(`Pan_neuro_control`="#E69F00", `Pan_neuro_ND75KD`="#56B4E9")
celltypePalette <- list(`unannotated`="#D3D3D3", `epithelial cell`="#E0AC69", `skeletal muscle of head`="#895129", `cone cell`="#FFFFB7", `outer photoreceptor cell`="#FFF060", `photoreceptor cell`="#F2DB00", `photoreceptor-like cell`="#F0C200", `adult optic chiasma glial cell`="#d896ff", `adult brain cell body glial cell`="#efbbff", `adult brain perineurial glial cell`="#BCBDDC", `adult lamina epithelial/marginal glial cell`="#9E9AC8", `adult reticular neuropil associated glial cell`="#756BB1", `optic lobe associated cortex glial cell`="#54278F", `cholinergic neuron`="#E41A1C", `gabaergic neuron`="#377EB8", `glutamatergic neuron`="#4DAF4A", `dopaminergic neuron`="#984EA3", `serotonergic neuron`="#FF7F00", `OPN neuron`="#A6CEE3", `columnar neuron T1`="#DEEBF7", `T neuron T3`="#9ECAE1", `T neuron T4/T5`="#3182BD", `distal medullary amacrine neuron Dm3`="#c6e6ba", `distal medullary amacrine neuron Dm8`="#A1D99B", `distal medullary amacrine neuron Dm9`="#31A354", `proximal medullary amacrine neuron Pm4`="#006D2C", `centrifugal neuron C2`="#7FC97F", `centrifugal neuron C3`="#BEAED4", `medullary intrinsic neuron Mi1`="#d8d5eb", `medullary intrinsic neuron Mi4`="#BCBDDC", `medullary intrinsic neuron Mi15`="#756BB1", `transmedullary neuron Tm1`="#b4dee9", `transmedullary neuron Tm2`="#bfdfe7", `transmedullary neuron Tm9`="#7eb5c4", `transmedullary neuron Tm20`="#9fcad5", `transmedullary Y neuron TmY14`="#5da1b3", `transmedullary Y neuron TmY5a`="#3c8ca2", `transmedullary Y neuron TmY8`="#1a7790", `tm5ab`="#014636", `lamina monopolar neuron L1 + L2`="#FDD3A0", `lamina monopolar neuron L3`="#FDBE85", `lamina monopolar neuron L4`="#FD8D3C", `lamina monopolar neuron L5`="#E6550D", `lamina wide-field 2 neuron`="#A63603", `lobular columnar neuron LC12`="#BC80BD", `olfactory receptor neuron`="#CCEBC5", `Poxn neuron`="#FFED6F", `hemocyte`="#999999", `kenyon cell`="#B3E2CD", `gamma kenyon cell`="#FDCDAC", `pigment cell`="#EE82EE")
clusterPalette <- list(`0`="#76C15D", `1`="#539DA4", `2`="#B15928", `3`="#78B69A", `4`="#46A93A", `5`="#EA4041", `6`="#E42123", `7`="#A5D880", `8`="#FDB35B", `9`="#F29A94", `10`="#449F35", `11`="#F98F8F", `12`="#2F83AF", `13`="#A6CEE3", `14`="#84B8D7", `15`="#85C969", `16`="#DBA08D", `17`="#C09B79", `18`="#F68724", `19`="#9CCF90", `20`="#2078B4", `21`="#5D9E43", `22`="#E7392B", `23`="#6D4199", `24`="#E73132", `25`="#E42521", `26`="#F89F5F", `27`="#37A22F", `28`="#8AC395", `29`="#D99B86", `30`="#63A3CB", `31`="#9774B6", `32`="#418EC0", `33`="#CEADC2", `34`="#4190AA", `35`="#F6807F", `36`="#A79C6B", `37`="#F1764A", `38`="#5298C5", `39`="#95D074", `40`="#F37070", `41`="#A48999", `42`="#8F9D5E", `43`="#EEE999", `44`="#AEDC8A", `45`="#FE8B16", `46`="#66A99F", `47`="#EF8D3E", `48`="#56B146", `49`="#FE9324", `50`="#BA9FCC", `51`="#FE8408", `52`="#FEFD97", `53`="#F06060", `54`="#E89458", `55`="#EE6240", `56`="#EB4D36", `57`="#FE9B31", `58`="#769D50", `59`="#7F5999", `60`="#66B952", `61`="#FC8109", `62`="#95C3DD", `63`="#73AED1", `64`="#DBD199", `65`="#C6AED3", `66`="#D5A7A8", `67`="#E29A73", `68`="#8B65AE", `69`="#3183BA", `70`="#AF91C5", `71`="#FBB369", `72`="#FDA33F", `73`="#C9B999", `74`="#FDAB4D", `75`="#7F57A7", `76`="#A382BD", `77`="#927199", `78`="#F48B54", `79`="#E1BF6D", `80`="#FDBB68", `81`="#ED5051", `82`="#F4E889", `83`="#D7AB5F", `84`="#C48243", `85`="#7348A0", `86`="#EAD47B", `87`="#B7A199", `88`="#CD9651", `89`="#BA6D35")
annotatedClusterPalette <- list(`0: unannotated`="#76C15D", `1: unannotated`="#539DA4", `2: unannotated`="#B15928", `3: glutamatergic neuron`="#78B69A", `4: unannotated`="#46A93A", `5: unannotated`="#EA4041", `6: epithelial cell`="#E42123", `7: transmedullary Y neuron TmY8`="#A5D880", `8: cone cell`="#FDB35B", `9: photoreceptor cell`="#F29A94", `10: photoreceptor cell`="#449F35", `11: cone cell`="#F98F8F", `12: cholinergic neuron`="#2F83AF", `13: skeletal muscle of head`="#A6CEE3", `14: gabaergic neuron`="#84B8D7", `15: unannotated`="#85C969", `16: medullary intrinsic neuron Mi4`="#DBA08D", `17: unannotated`="#C09B79", `18: outer photoreceptor cell`="#F68724", `19: epithelial cell`="#9CCF90", `20: skeletal muscle of head`="#2078B4", `21: gabaergic neuron`="#5D9E43", `22: unannotated`="#E7392B", `23: unannotated`="#6D4199", `24: cone cell`="#E73132", `25: T neuron T4/T5`="#E42521", `26: OPN neuron`="#F89F5F", `27: kenyon cell`="#37A22F", `28: outer photoreceptor cell`="#8AC395", `29: olfactory receptor neuron`="#D99B86", `30: unannotated`="#63A3CB", `31: unannotated`="#9774B6", `32: gamma kenyon cell`="#418EC0", `33: unannotated`="#CEADC2", `34: distal medullary amacrine neuron Dm3`="#4190AA", `35: serotonergic neuron`="#F6807F", `36: lamina monopolar neuron L1 + L2`="#A79C6B", `37: adult reticular neuropil associated glial cell`="#F1764A", `38: epithelial cell`="#5298C5", `39: adult optic chiasma glial cell`="#95D074", `40: unannotated`="#F37070", `41: epithelial cell`="#A48999", `42: tm5ab`="#8F9D5E", `43: unannotated`="#EEE999", `44: T neuron T3`="#AEDC8A", `45: unannotated`="#FE8B16", `46: unannotated`="#66A99F", `47: pigment cell`="#EF8D3E", `48: epithelial cell`="#56B146", `49: lobular columnar neuron LC12`="#FE9324", `50: centrifugal neuron C2`="#BA9FCC", `51: Poxn neuron`="#FE8408", `52: transmedullary Y neuron TmY5a`="#FEFD97", `53: lamina monopolar neuron L4`="#F06060", `54: photoreceptor-like cell`="#E89458", `55: lamina wide-field 2 neuron`="#EE6240", `56: columnar neuron T1`="#EB4D36", `57: adult brain cell body glial cell`="#FE9B31", `58: dopaminergic neuron`="#769D50", `59: lamina monopolar neuron L3`="#7F5999", `60: transmedullary neuron Tm1`="#66B952", `61: medullary intrinsic neuron Mi1`="#FC8109", `62: medullary intrinsic neuron Mi4`="#95C3DD", `63: transmedullary neuron Tm9`="#73AED1", `64: adult brain perineurial glial cell`="#DBD199", `65: skeletal muscle of head`="#C6AED3", `66: adult lamina epithelial/marginal glial cell`="#D5A7A8", `67: transmedullary neuron Tm20`="#E29A73", `68: hemocyte`="#8B65AE", `69: adult optic chiasma glial cell`="#3183BA", `70: unannotated`="#AF91C5", `71: transmedullary neuron Tm2`="#FBB369", `72: lamina monopolar neuron L5`="#FDA33F", `73: glutamatergic neuron`="#C9B999", `74: epithelial cell`="#FDAB4D", `75: cholinergic neuron`="#7F57A7", `76: centrifugal neuron C3`="#A382BD", `77: unannotated`="#927199", `78: distal medullary amacrine neuron Dm9`="#F48B54", `79: distal medullary amacrine neuron Dm8`="#E1BF6D", `80: medullary intrinsic neuron Mi15`="#FDBB68", `81: proximal medullary amacrine neuron Pm4`="#ED5051", `82: optic lobe associated cortex glial cell`="#F4E889", `83: unannotated`="#D7AB5F", `84: OPN neuron`="#C48243", `85: unannotated`="#7348A0", `86: unannotated`="#EAD47B", `87: unannotated`="#B7A199", `88: unannotated`="#CD9651", `89: transmedullary Y neuron TmY14`="#BA6D35")

# Random seed
set.seed(42)

Parameters

input_seurat_object <- "./data/Pan_neuro_integrated_FINAL.rds"
input_fca_head_object <- "./data/Fly_Cell_Atlas__Head_Stringent.rds"
output_figure_prefix <- "./figures/Figure_4A/"

I.1 Reading Seurat object

First step is to read the Seurat objects, previously created

message("Loading Seurat object...")
## Loading Seurat object...
data.seurat <- readRDS(input_seurat_object)
message(ncol(data.seurat), " cells were loaded")
## 23179 cells were loaded
message(nrow(data.seurat), " genes were loaded")
## 23932 genes were loaded

II.1. First: re-integrate and force integration of these clusters into the FCA data

# Read FCA object
data.seurat_fca <- readRDS(input_fca_head_object)
message(ncol(data.seurat_fca), " cells were loaded")
## 100527 cells were loaded
message(nrow(data.seurat_fca), " genes were loaded")
## 13056 genes were loaded
# Create a single Seurat object
data.seurat_reintegrated <- merge(x = data.seurat, y = data.seurat_fca, project = "Pan_neuro_FCA_reintegrated")

# Filling NAs in metadata
data.seurat_reintegrated$batch[data.seurat_reintegrated$orig.ident == "Pan_neuro_control"] <- 13
data.seurat_reintegrated$batch[data.seurat_reintegrated$orig.ident == "Pan_neuro_ND75KD"] <- 14

# Size of the new Seurat object
message(ncol(data.seurat_reintegrated), " cells in integrated Seurat object")
## 123706 cells in integrated Seurat object
message(nrow(data.seurat_reintegrated), " genes in total (union)")
## 23949 genes in total (union)
# By default, all non-overlapping genes are set to 0. Removing genes that are not overlapping between the two datasets.
data.seurat_reintegrated <- data.seurat_reintegrated[sort(intersect(rownames(data.seurat_fca), rownames(data.seurat))),]

# Size of the new Seurat object
message(nrow(data.seurat_reintegrated), " genes in integrated Seurat object (intersection)")
## 13039 genes in integrated Seurat object (intersection)
# Default pipeline
data.seurat_reintegrated <- NormalizeData(data.seurat_reintegrated, normalization.method = "LogNormalize", scale.factor = 10000, verbose = F)
data.seurat_reintegrated <- FindVariableFeatures(data.seurat_reintegrated, selection.method = "vst", nfeatures = 2000, verbose = F)
data.seurat_reintegrated <- ScaleData(data.seurat_reintegrated, verbose = F)
data.seurat_reintegrated <- RunPCA(data.seurat_reintegrated, features = VariableFeatures(data.seurat_reintegrated), verbose = F, npcs = 100)
# Run Harmony
# Here I force Harmony to run on 30 iterations, without early stop. To try forcing the integration of the 2/23 cluster
data.seurat_reintegrated <- RunHarmony(object = data.seurat_reintegrated, group.by.vars = "batch", plot_convergence = TRUE, max_iter = 30, early_stop = F, verbose = F)

## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity
# Clustering
data.seurat_reintegrated <- FindNeighbors(data.seurat_reintegrated, dims = 1:100, reduction = "harmony", verbose = F)
# I use a resolution of 4, which is completely arbitrary, we may tune it to have more/less clusters
data.seurat_reintegrated <- FindClusters(data.seurat_reintegrated, resolution = 4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 123706
## Number of edges: 7265464
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9057
## Number of communities: 136
## Elapsed time: 40 seconds
## 19 singletons identified. 117 final clusters.
# UMAP (visualization)
data.seurat_reintegrated <- RunUMAP(data.seurat_reintegrated, dims = 1:100, reduction = "harmony", verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(data.seurat_reintegrated, reduction = "umap", pt.size = 1, group.by = "batch", raster = F)

# UMAP (visualization)
# Clusters 2/23 are now both in cluster 45
DimPlot(data.seurat_reintegrated, reduction = "umap", pt.size = 1, group.by = "seurat_clusters", raster = F)

data.seurat_reintegrated$annotation_clusters[is.na(data.seurat_reintegrated$annotation_clusters)] <- paste0("FCA_", data.seurat_reintegrated$annotation[is.na(data.seurat_reintegrated$annotation_clusters)])

# Generate the DimPlot with these specified colors
p <- DimPlot(data.seurat_reintegrated, reduction = "umap", label = F, pt.size = 0.5, group.by = "annotation_clusters", raster = F) + NoLegend()
p <- LabelClusters(p, id = "annotation_clusters",  fontface = "bold", repel = F)
p

It seems it did not merge again… we can see it at the top, near the Poxn / OPN neurons clusters

II.2 Hierachical clustering

II.2.1 On integrated Seurat clusters (FCA + Pan neuro)

# 1. Summarize the data by cluster
clusters <- as.character(sort(unique(data.seurat_reintegrated$seurat_clusters)))
data.results <- data.frame(matrix(0, nrow = length(clusters), ncol = ncol(data.seurat_reintegrated@reductions$harmony@cell.embeddings)), row.names = clusters)
colnames(data.results) <- colnames(data.seurat_reintegrated@reductions$harmony@cell.embeddings)
for(clust in clusters){
  data.results[clust,] <- colMeans(data.seurat_reintegrated@reductions$harmony@cell.embeddings[data.seurat_reintegrated$seurat_clusters == clust,])
}
# 2. Hierarchical clustering
hc <- hclust(dist(data.results, method = "euclidean"), method = "ward.D2")
hclustplot(hc, k = 40)

Cluster 45 is close to the big blob in the top-center of the integrated UMAP (109, 71, 90 ,2 ,3)

II.2.2 On Emma + FCA annotation (FCA + Pan neuro)

# 1. Summarize the data by cluster
clusters <- as.character(sort(unique(data.seurat_reintegrated$annotation_clusters)))
data.results <- data.frame(matrix(0, nrow = length(clusters), ncol = ncol(data.seurat_reintegrated@reductions$harmony@cell.embeddings)), row.names = clusters)
colnames(data.results) <- colnames(data.seurat_reintegrated@reductions$harmony@cell.embeddings)
for(clust in clusters){
  data.results[clust,] <- colMeans(data.seurat_reintegrated@reductions$harmony@cell.embeddings[data.seurat_reintegrated$annotation_clusters == clust,])
}
# 2. Hierarchical clustering
hc <- hclust(dist(data.results, method = "euclidean"), method = "ward.D2")
hclustplot(hc, h = 20, mar = c(15,1,1,1))

Save plot

pdf(paste0(output_figure_prefix, "hclust_FCA_integration.pdf"), height = 8, width = 20)
hclustplot(hc, h = 20, mar = c(15,1,1,1))
dev.off()
## png 
##   2